/*-------------------------<-- Start of Description -->-------------------------*\ | PURPOSE: This macro breaks a dataset into smaller datasets, sorts those | | smaller datasets, and then merges back together in ordre. It is | | designed for sorting a very large dataset more efficiently. | |--------------------------<-- End of Description -->----------------------------| |--------------------------------------------------------------------------------| |------------------------<-- Start of Files Created -->--------------------------| | SYNTAX: %checkall (indata = _SAS_dataset_name_, | | outdata = _SAS_output_dataset_name_, | | by = _SAS_sorted_by_variable_list_, | | parts = _number_of_pieces_to_break_into_) | \-------------------------<-- End of Files Created-->---------------------------*/ %macro bigsort(indata=, outdata=, by=, parts=10) / des='Sorts very large datasets'; %local indata outdata by notes starttime i; %let notes = %sysfunc(getoption(notes,keyword)); options nonotes; %let starttime = %sysfunc(datetime()); /*---------------------------------------------------\ | complete simple diagnostic of PARTS macro variable | \---------------------------------------------------*/ %if &parts < 5 %then %let parts = 5; %else %if &parts > 25 %then %let parts = 25; /*-------------------------------------------------------\ | if the data is less than 500,000 records, just sort it | +--------------------------------------------------------+-------------------------------------+ | NOTE: Depending on your operating system and environment, you may want to change this value | \---------------------------------------------------------------------------------------------*/ %if %nobs(&indata) < 500000 %then %do; proc sort data=&indata out=&outdata; by &by; run; %end ; %else %do; /*-------------------------------------\ | determine size of each chunk of data | \-------------------------------------*/ %let smallobscnt = %scan(%eval(%nobs(&indata)/&parts), 1) ; /*----------------------------------------------------\ | loop through the first N-1 parts, sorting each part | \----------------------------------------------------*/ %do i = 0 %to %eval(&parts - 2); proc sort data = &indata (firstobs=%eval(&smallobscnt*&i+1) obs=%eval(&smallobscnt*&i + &smallobscnt)) out = sorted%eval(&i+1); by &by ; run ; %end; /*-------------------\ | sort the last part | \-------------------*/ proc sort data=&indata(firstobs=%eval(&smallobscnt*(&parts-1)+1)) out=sorted&parts; by &by; run; /*-----------------------------\ | merge the data back together | \-----------------------------*/ data &outdata; set %_prefix(sorted,1 to &parts); by &by; run; proc datasets library=macro nolist memtype=(view data); delete sorted1-sorted&parts; run; quit; %end; /*----------------------------------\ | prepare SAS session for open code | \----------------------------------*/ options ¬es; %put NOTE: There were %nobs(&indata) observations read from the data set %data(&indata).; %put NOTE: The data set &outdata has %nobs(&outdata) observations and %nvars(&outdata) variables. ; %timenote (macro=bigsort, starttime=&starttime); %put; %mend bigsort ;